{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [],
   "source": [
    "import math\n",
    "\n",
    "import numpy as np\n",
    "import pandas  as pd \n",
    "# import numpy as np\n",
    "import os\n",
    "import xgboost as xgb\n",
    "os.chdir(r'D:\\pythonCode\\company_talent_loss\\data')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-06-07T02:33:15.104899200Z",
     "start_time": "2025-06-07T02:33:14.263476800Z"
    }
   },
   "id": "initial_id"
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1100 entries, 0 to 1099\n",
      "Data columns (total 31 columns):\n",
      " #   Column                    Non-Null Count  Dtype \n",
      "---  ------                    --------------  ----- \n",
      " 0   Attrition                 1100 non-null   int64 \n",
      " 1   Age                       1100 non-null   int64 \n",
      " 2   BusinessTravel            1100 non-null   object\n",
      " 3   Department                1100 non-null   object\n",
      " 4   DistanceFromHome          1100 non-null   int64 \n",
      " 5   Education                 1100 non-null   int64 \n",
      " 6   EducationField            1100 non-null   object\n",
      " 7   EmployeeNumber            1100 non-null   int64 \n",
      " 8   EnvironmentSatisfaction   1100 non-null   int64 \n",
      " 9   Gender                    1100 non-null   object\n",
      " 10  JobInvolvement            1100 non-null   int64 \n",
      " 11  JobLevel                  1100 non-null   int64 \n",
      " 12  JobRole                   1100 non-null   object\n",
      " 13  JobSatisfaction           1100 non-null   int64 \n",
      " 14  MaritalStatus             1100 non-null   object\n",
      " 15  MonthlyIncome             1100 non-null   int64 \n",
      " 16  NumCompaniesWorked        1100 non-null   int64 \n",
      " 17  Over18                    1100 non-null   object\n",
      " 18  OverTime                  1100 non-null   object\n",
      " 19  PercentSalaryHike         1100 non-null   int64 \n",
      " 20  PerformanceRating         1100 non-null   int64 \n",
      " 21  RelationshipSatisfaction  1100 non-null   int64 \n",
      " 22  StandardHours             1100 non-null   int64 \n",
      " 23  StockOptionLevel          1100 non-null   int64 \n",
      " 24  TotalWorkingYears         1100 non-null   int64 \n",
      " 25  TrainingTimesLastYear     1100 non-null   int64 \n",
      " 26  WorkLifeBalance           1100 non-null   int64 \n",
      " 27  YearsAtCompany            1100 non-null   int64 \n",
      " 28  YearsInCurrentRole        1100 non-null   int64 \n",
      " 29  YearsSinceLastPromotion   1100 non-null   int64 \n",
      " 30  YearsWithCurrManager      1100 non-null   int64 \n",
      "dtypes: int64(23), object(8)\n",
      "memory usage: 266.5+ KB\n"
     ]
    }
   ],
   "source": [
    "data = pd.read_csv(r'..\\data\\train.csv')\n",
    "data.info()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-06-07T02:33:18.768442400Z",
     "start_time": "2025-06-07T02:33:18.750716Z"
    }
   },
   "id": "79542ce42ff8574"
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1100 entries, 0 to 1099\n",
      "Data columns (total 31 columns):\n",
      " #   Column                    Non-Null Count  Dtype \n",
      "---  ------                    --------------  ----- \n",
      " 0   Attrition                 1100 non-null   int64 \n",
      " 1   Age                       1100 non-null   int64 \n",
      " 2   BusinessTravel            1100 non-null   object\n",
      " 3   Department                1100 non-null   object\n",
      " 4   DistanceFromHome          1100 non-null   int64 \n",
      " 5   Education                 1100 non-null   int64 \n",
      " 6   EducationField            1100 non-null   object\n",
      " 7   EmployeeNumber            1100 non-null   int64 \n",
      " 8   EnvironmentSatisfaction   1100 non-null   int64 \n",
      " 9   Gender                    1100 non-null   object\n",
      " 10  JobInvolvement            1100 non-null   int64 \n",
      " 11  JobLevel                  1100 non-null   int64 \n",
      " 12  JobRole                   1100 non-null   object\n",
      " 13  JobSatisfaction           1100 non-null   int64 \n",
      " 14  MaritalStatus             1100 non-null   object\n",
      " 15  MonthlyIncome             1100 non-null   int64 \n",
      " 16  NumCompaniesWorked        1100 non-null   int64 \n",
      " 17  Over18                    1100 non-null   object\n",
      " 18  OverTime                  1100 non-null   object\n",
      " 19  PercentSalaryHike         1100 non-null   int64 \n",
      " 20  PerformanceRating         1100 non-null   int64 \n",
      " 21  RelationshipSatisfaction  1100 non-null   int64 \n",
      " 22  StandardHours             1100 non-null   int64 \n",
      " 23  StockOptionLevel          1100 non-null   int64 \n",
      " 24  TotalWorkingYears         1100 non-null   int64 \n",
      " 25  TrainingTimesLastYear     1100 non-null   int64 \n",
      " 26  WorkLifeBalance           1100 non-null   int64 \n",
      " 27  YearsAtCompany            1100 non-null   int64 \n",
      " 28  YearsInCurrentRole        1100 non-null   int64 \n",
      " 29  YearsSinceLastPromotion   1100 non-null   int64 \n",
      " 30  YearsWithCurrManager      1100 non-null   int64 \n",
      "dtypes: int64(23), object(8)\n",
      "memory usage: 266.5+ KB\n"
     ]
    }
   ],
   "source": [
    "df=data.copy()\n",
    "df.info()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-06-07T02:33:20.934336400Z",
     "start_time": "2025-06-07T02:33:20.933832600Z"
    }
   },
   "id": "93b2849a6f54214b"
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [],
   "source": [
    "df = data.copy()\n",
    "df.drop(['Over18','StandardHours','EmployeeNumber','Gender'],axis='columns',inplace=True)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-06-07T02:33:23.334647900Z",
     "start_time": "2025-06-07T02:33:23.332642700Z"
    }
   },
   "id": "23420f2bb62e9d7a"
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "outputs": [
    {
     "data": {
      "text/plain": "0        5993\n1       10502\n2        6074\n3       12742\n4        2596\n        ...  \n1095     4014\n1096     5405\n1097     6334\n1098     2472\n1099     2042\nName: MonthlyIncome, Length: 1100, dtype: int64"
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 考察 JobInvolvement 和离职率的关系\n",
    "df1 = df.copy()\n",
    "cols=df1.columns.drop('Attrition')\n",
    "pd.DataFrame().to_excel(\"../data/ana_data.xlsx\")\n",
    "# df2 =pd.crosstab(df1['MonthlyIncome'],df1['Attrition']).reset_index()\n",
    "for col in cols:\n",
    "    df2 =pd.crosstab(df1[col],df1['Attrition']).reset_index()\n",
    "    df2['ratio'] = df2[1] / (df2[1]+df2[0])\n",
    "    with pd.ExcelWriter(\"../data/ana_data.xlsx\",mode='a') as writer:\n",
    "        df2.to_excel(writer,sheet_name=col)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-06-07T03:02:57.712490100Z",
     "start_time": "2025-06-07T03:02:55.881030500Z"
    }
   },
   "id": "c5aafd7ddeed5fb3"
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1100 entries, 0 to 1099\n",
      "Data columns (total 27 columns):\n",
      " #   Column                    Non-Null Count  Dtype  \n",
      "---  ------                    --------------  -----  \n",
      " 0   Attrition                 1100 non-null   int64  \n",
      " 1   Age                       1100 non-null   float64\n",
      " 2   BusinessTravel            1100 non-null   float64\n",
      " 3   Department                1100 non-null   float64\n",
      " 4   DistanceFromHome          1100 non-null   float64\n",
      " 5   Education                 1100 non-null   float64\n",
      " 6   EducationField            1100 non-null   float64\n",
      " 7   EnvironmentSatisfaction   1100 non-null   float64\n",
      " 8   JobInvolvement            1100 non-null   float64\n",
      " 9   JobLevel                  1100 non-null   float64\n",
      " 10  JobRole                   1100 non-null   float64\n",
      " 11  JobSatisfaction           1100 non-null   float64\n",
      " 12  MaritalStatus             1100 non-null   float64\n",
      " 13  MonthlyIncome             1100 non-null   float64\n",
      " 14  NumCompaniesWorked        1100 non-null   float64\n",
      " 15  OverTime                  1100 non-null   float64\n",
      " 16  PercentSalaryHike         1100 non-null   float64\n",
      " 17  PerformanceRating         1100 non-null   float64\n",
      " 18  RelationshipSatisfaction  1100 non-null   float64\n",
      " 19  StockOptionLevel          1100 non-null   float64\n",
      " 20  TotalWorkingYears         1100 non-null   float64\n",
      " 21  TrainingTimesLastYear     1100 non-null   float64\n",
      " 22  WorkLifeBalance           1100 non-null   float64\n",
      " 23  YearsAtCompany            1100 non-null   float64\n",
      " 24  YearsInCurrentRole        1100 non-null   float64\n",
      " 25  YearsSinceLastPromotion   1100 non-null   float64\n",
      " 26  YearsWithCurrManager      1100 non-null   float64\n",
      "dtypes: float64(26), int64(1)\n",
      "memory usage: 232.2 KB\n"
     ]
    }
   ],
   "source": [
    "# 特征工程，给离散值赋予权重\n",
    "# 从exel文件中获取到对应的表，然后把两张表连接起来你\n",
    "# 然后用ration值替换离散值\n",
    "df3 = df.copy()\n",
    "cols = df3.columns.drop('Attrition')\n",
    "for item in cols.values:\n",
    "    df4 =pd.crosstab(df3[item],df3['Attrition']).reset_index()\n",
    "    df4['ratio'] = df4[1] / (df4[1]+ df4[0])\n",
    "    df3 = pd.merge(df3,df4[[item,'ratio']],on=item)\n",
    "    df3.drop(item,axis='columns',inplace=True)\n",
    "    df3.rename(columns={'ratio':item},inplace=True)\n",
    "df3.info()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-06-07T02:53:40.484849500Z",
     "start_time": "2025-06-07T02:53:40.382620600Z"
    }
   },
   "id": "9de88b185873273d"
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "outputs": [
    {
     "data": {
      "text/plain": "      Attrition       Age  BusinessTravel  Department  DistanceFromHome  \\\n0             0  0.108108         0.15629    0.140303          0.118056   \n1             0  0.000000         0.22439    0.140303          0.118056   \n2             1  0.132075         0.22439    0.140303          0.090909   \n3             0  0.166667         0.15629    0.140303          0.118056   \n4             1  0.285714         0.22439    0.140303          0.118056   \n...         ...       ...             ...         ...               ...   \n1095          0  0.118644         0.15629    0.140303          0.200000   \n1096          0  0.051282         0.15629    0.202417          0.144654   \n1097          0  0.108108         0.15629    0.202417          0.230769   \n1098          1  0.416667         0.15629    0.140303          0.090909   \n1099          1  0.322581         0.22439    0.140303          0.144654   \n\n      Education  EducationField  EnvironmentSatisfaction  JobInvolvement  \\\n0      0.156146        0.151515                 0.246512        0.168498   \n1      0.156146        0.151515                 0.139053        0.146747   \n2      0.167053        0.151515                 0.246512        0.380952   \n3      0.214286        0.151515                 0.139053        0.168498   \n4      0.167053        0.136499                 0.246512        0.168498   \n...         ...             ...                      ...             ...   \n1095   0.156146        0.136499                 0.133531        0.146747   \n1096   0.156146        0.212598                 0.157143        0.380952   \n1097   0.156146        0.212598                 0.139053        0.168498   \n1098   0.214286        0.151515                 0.139053        0.146747   \n1099   0.167053        0.151515                 0.246512        0.146747   \n\n      JobLevel  ...  PerformanceRating  RelationshipSatisfaction  \\\n0     0.107769  ...           0.154506                  0.158824   \n1     0.127389  ...           0.154506                  0.204545   \n2     0.107769  ...           0.202381                  0.142415   \n3     0.037037  ...           0.154506                  0.158824   \n4     0.259709  ...           0.154506                  0.204545   \n...        ...  ...                ...                       ...   \n1095  0.259709  ...           0.154506                  0.158824   \n1096  0.107769  ...           0.202381                  0.204545   \n1097  0.107769  ...           0.154506                  0.142415   \n1098  0.259709  ...           0.202381                  0.204545   \n1099  0.259709  ...           0.154506                  0.152074   \n\n      StockOptionLevel  TotalWorkingYears  TrainingTimesLastYear  \\\n0             0.085202           0.261538               0.181818   \n1             0.085202           0.142857               0.181818   \n2             0.251586           0.130435               0.145119   \n3             0.085202           0.000000               0.145119   \n4             0.065574           0.522388               0.181818   \n...                ...                ...                    ...   \n1095          0.085202           0.183673               0.145119   \n1096          0.065574           0.045455               0.191489   \n1097          0.065574           0.130435               0.181818   \n1098          0.251586           0.522388               0.181818   \n1099          0.085202           0.160920               0.181818   \n\n      WorkLifeBalance  YearsAtCompany  YearsInCurrentRole  \\\n0            0.194175        0.128571            0.000000   \n1            0.269841        0.132450            0.168675   \n2            0.141593        0.109375            0.133333   \n3            0.141593        0.100000            0.038462   \n4            0.141593        0.358209            0.288043   \n...               ...             ...                 ...   \n1095         0.141593        0.212766            0.189091   \n1096         0.175781        0.150000            0.189091   \n1097         0.141593        0.358209            0.288043   \n1098         0.141593        0.358209            0.288043   \n1099         0.141593        0.144330            0.189091   \n\n      YearsSinceLastPromotion  YearsWithCurrManager  \n0                    0.191011              0.147239  \n1                    0.149606              0.150685  \n2                    0.191011              0.100000  \n3                    0.000000              0.083333  \n4                    0.191011              0.319797  \n...                       ...                   ...  \n1095                 0.162602              0.138462  \n1096                 0.191011              0.134615  \n1097                 0.191011              0.319797  \n1098                 0.191011              0.319797  \n1099                 0.149606              0.138462  \n\n[1100 rows x 27 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Attrition</th>\n      <th>Age</th>\n      <th>BusinessTravel</th>\n      <th>Department</th>\n      <th>DistanceFromHome</th>\n      <th>Education</th>\n      <th>EducationField</th>\n      <th>EnvironmentSatisfaction</th>\n      <th>JobInvolvement</th>\n      <th>JobLevel</th>\n      <th>...</th>\n      <th>PerformanceRating</th>\n      <th>RelationshipSatisfaction</th>\n      <th>StockOptionLevel</th>\n      <th>TotalWorkingYears</th>\n      <th>TrainingTimesLastYear</th>\n      <th>WorkLifeBalance</th>\n      <th>YearsAtCompany</th>\n      <th>YearsInCurrentRole</th>\n      <th>YearsSinceLastPromotion</th>\n      <th>YearsWithCurrManager</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>0</td>\n      <td>0.108108</td>\n      <td>0.15629</td>\n      <td>0.140303</td>\n      <td>0.118056</td>\n      <td>0.156146</td>\n      <td>0.151515</td>\n      <td>0.246512</td>\n      <td>0.168498</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.158824</td>\n      <td>0.085202</td>\n      <td>0.261538</td>\n      <td>0.181818</td>\n      <td>0.194175</td>\n      <td>0.128571</td>\n      <td>0.000000</td>\n      <td>0.191011</td>\n      <td>0.147239</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>0</td>\n      <td>0.000000</td>\n      <td>0.22439</td>\n      <td>0.140303</td>\n      <td>0.118056</td>\n      <td>0.156146</td>\n      <td>0.151515</td>\n      <td>0.139053</td>\n      <td>0.146747</td>\n      <td>0.127389</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.204545</td>\n      <td>0.085202</td>\n      <td>0.142857</td>\n      <td>0.181818</td>\n      <td>0.269841</td>\n      <td>0.132450</td>\n      <td>0.168675</td>\n      <td>0.149606</td>\n      <td>0.150685</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>0.132075</td>\n      <td>0.22439</td>\n      <td>0.140303</td>\n      <td>0.090909</td>\n      <td>0.167053</td>\n      <td>0.151515</td>\n      <td>0.246512</td>\n      <td>0.380952</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.202381</td>\n      <td>0.142415</td>\n      <td>0.251586</td>\n      <td>0.130435</td>\n      <td>0.145119</td>\n      <td>0.141593</td>\n      <td>0.109375</td>\n      <td>0.133333</td>\n      <td>0.191011</td>\n      <td>0.100000</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>0</td>\n      <td>0.166667</td>\n      <td>0.15629</td>\n      <td>0.140303</td>\n      <td>0.118056</td>\n      <td>0.214286</td>\n      <td>0.151515</td>\n      <td>0.139053</td>\n      <td>0.168498</td>\n      <td>0.037037</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.158824</td>\n      <td>0.085202</td>\n      <td>0.000000</td>\n      <td>0.145119</td>\n      <td>0.141593</td>\n      <td>0.100000</td>\n      <td>0.038462</td>\n      <td>0.000000</td>\n      <td>0.083333</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>0.285714</td>\n      <td>0.22439</td>\n      <td>0.140303</td>\n      <td>0.118056</td>\n      <td>0.167053</td>\n      <td>0.136499</td>\n      <td>0.246512</td>\n      <td>0.168498</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.204545</td>\n      <td>0.065574</td>\n      <td>0.522388</td>\n      <td>0.181818</td>\n      <td>0.141593</td>\n      <td>0.358209</td>\n      <td>0.288043</td>\n      <td>0.191011</td>\n      <td>0.319797</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>1095</th>\n      <td>0</td>\n      <td>0.118644</td>\n      <td>0.15629</td>\n      <td>0.140303</td>\n      <td>0.200000</td>\n      <td>0.156146</td>\n      <td>0.136499</td>\n      <td>0.133531</td>\n      <td>0.146747</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.158824</td>\n      <td>0.085202</td>\n      <td>0.183673</td>\n      <td>0.145119</td>\n      <td>0.141593</td>\n      <td>0.212766</td>\n      <td>0.189091</td>\n      <td>0.162602</td>\n      <td>0.138462</td>\n    </tr>\n    <tr>\n      <th>1096</th>\n      <td>0</td>\n      <td>0.051282</td>\n      <td>0.15629</td>\n      <td>0.202417</td>\n      <td>0.144654</td>\n      <td>0.156146</td>\n      <td>0.212598</td>\n      <td>0.157143</td>\n      <td>0.380952</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.202381</td>\n      <td>0.204545</td>\n      <td>0.065574</td>\n      <td>0.045455</td>\n      <td>0.191489</td>\n      <td>0.175781</td>\n      <td>0.150000</td>\n      <td>0.189091</td>\n      <td>0.191011</td>\n      <td>0.134615</td>\n    </tr>\n    <tr>\n      <th>1097</th>\n      <td>0</td>\n      <td>0.108108</td>\n      <td>0.15629</td>\n      <td>0.202417</td>\n      <td>0.230769</td>\n      <td>0.156146</td>\n      <td>0.212598</td>\n      <td>0.139053</td>\n      <td>0.168498</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.142415</td>\n      <td>0.065574</td>\n      <td>0.130435</td>\n      <td>0.181818</td>\n      <td>0.141593</td>\n      <td>0.358209</td>\n      <td>0.288043</td>\n      <td>0.191011</td>\n      <td>0.319797</td>\n    </tr>\n    <tr>\n      <th>1098</th>\n      <td>1</td>\n      <td>0.416667</td>\n      <td>0.15629</td>\n      <td>0.140303</td>\n      <td>0.090909</td>\n      <td>0.214286</td>\n      <td>0.151515</td>\n      <td>0.139053</td>\n      <td>0.146747</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.202381</td>\n      <td>0.204545</td>\n      <td>0.251586</td>\n      <td>0.522388</td>\n      <td>0.181818</td>\n      <td>0.141593</td>\n      <td>0.358209</td>\n      <td>0.288043</td>\n      <td>0.191011</td>\n      <td>0.319797</td>\n    </tr>\n    <tr>\n      <th>1099</th>\n      <td>1</td>\n      <td>0.322581</td>\n      <td>0.22439</td>\n      <td>0.140303</td>\n      <td>0.144654</td>\n      <td>0.167053</td>\n      <td>0.151515</td>\n      <td>0.246512</td>\n      <td>0.146747</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.152074</td>\n      <td>0.085202</td>\n      <td>0.160920</td>\n      <td>0.181818</td>\n      <td>0.141593</td>\n      <td>0.144330</td>\n      <td>0.189091</td>\n      <td>0.149606</td>\n      <td>0.138462</td>\n    </tr>\n  </tbody>\n</table>\n<p>1100 rows × 27 columns</p>\n</div>"
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-06-07T02:53:49.423942400Z",
     "start_time": "2025-06-07T02:53:49.403078100Z"
    }
   },
   "id": "ee1b4b3ccf2b3169"
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0       0\n",
      "1       0\n",
      "2       1\n",
      "3       0\n",
      "4       1\n",
      "       ..\n",
      "1095    0\n",
      "1096    0\n",
      "1097    0\n",
      "1098    1\n",
      "1099    1\n",
      "Name: Attrition, Length: 1100, dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/plain": "0.9583333333333333"
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import roc_auc_score\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "x = df3.drop('Attrition',axis='columns')\n",
    "y = df3['Attrition']\n",
    "print(y)\n",
    "x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=25)\n",
    "es = LogisticRegression(random_state=25)\n",
    "es.fit(x_train,y_train)\n",
    "y_pre=es.predict(x_test)\n",
    "roc_auc_score(y_test,y_pre)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-06-07T03:16:05.185061300Z",
     "start_time": "2025-06-07T03:16:05.171360600Z"
    }
   },
   "id": "fe5b43e8f063b5a2"
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "outputs": [],
   "source": [
    "# 获取测试集数据\n",
    "tf = pd.read_csv('../data/test2.csv')\n",
    "tf.drop(['Over18','StandardHours','EmployeeNumber','Gender'],axis='columns',inplace=True)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-06-07T02:59:51.728224900Z",
     "start_time": "2025-06-07T02:59:51.718980100Z"
    }
   },
   "id": "5651a90266909def"
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "outputs": [
    {
     "data": {
      "text/plain": "    Unnamed: 0  Age   0   1     ratio\n0            0   18   2   0  0.000000\n1            1   19   3   5  0.625000\n2            2   20   3   3  0.500000\n3            3   21   2   5  0.714286\n4            4   22   7   5  0.416667\n5            5   23   6   4  0.400000\n6            6   24  14   4  0.222222\n7            7   25  15   5  0.250000\n8            8   26  21  10  0.322581\n9            9   27  36   2  0.052632\n10          10   28  25  10  0.285714\n11          11   29  40  15  0.272727\n12          12   30  39   9  0.187500\n13          13   31  35  13  0.270833\n14          14   32  39   8  0.170213\n15          15   33  35  12  0.255319\n16          16   34  46   7  0.132075\n17          17   35  52   7  0.118644\n18          18   36  51   4  0.072727\n19          19   37  33   4  0.108108\n20          20   38  37   2  0.051282\n21          21   39  25   5  0.166667\n22          22   40  44   3  0.063830\n23          23   41  26   5  0.161290\n24          24   42  32   2  0.058824\n25          25   43  24   1  0.040000\n26          26   44  18   4  0.181818\n27          27   45  28   2  0.066667\n28          28   46  22   3  0.120000\n29          29   47  14   2  0.125000\n30          30   48  13   0  0.000000\n31          31   49  20   2  0.090909\n32          32   50  22   1  0.043478\n33          33   51  14   2  0.125000\n34          34   52  10   2  0.166667\n35          35   53  11   2  0.153846\n36          36   54  15   0  0.000000\n37          37   55  14   3  0.176471\n38          38   56  11   2  0.153846\n39          39   57   2   0  0.000000\n40          40   58   4   3  0.428571\n41          41   59   8   0  0.000000\n42          42   60   4   0  0.000000",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Unnamed: 0</th>\n      <th>Age</th>\n      <th>0</th>\n      <th>1</th>\n      <th>ratio</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>0</td>\n      <td>18</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>19</td>\n      <td>3</td>\n      <td>5</td>\n      <td>0.625000</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2</td>\n      <td>20</td>\n      <td>3</td>\n      <td>3</td>\n      <td>0.500000</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>3</td>\n      <td>21</td>\n      <td>2</td>\n      <td>5</td>\n      <td>0.714286</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>4</td>\n      <td>22</td>\n      <td>7</td>\n      <td>5</td>\n      <td>0.416667</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>5</td>\n      <td>23</td>\n      <td>6</td>\n      <td>4</td>\n      <td>0.400000</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>6</td>\n      <td>24</td>\n      <td>14</td>\n      <td>4</td>\n      <td>0.222222</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>7</td>\n      <td>25</td>\n      <td>15</td>\n      <td>5</td>\n      <td>0.250000</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>8</td>\n      <td>26</td>\n      <td>21</td>\n      <td>10</td>\n      <td>0.322581</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>9</td>\n      <td>27</td>\n      <td>36</td>\n      <td>2</td>\n      <td>0.052632</td>\n    </tr>\n    <tr>\n      <th>10</th>\n      <td>10</td>\n      <td>28</td>\n      <td>25</td>\n      <td>10</td>\n      <td>0.285714</td>\n    </tr>\n    <tr>\n      <th>11</th>\n      <td>11</td>\n      <td>29</td>\n      <td>40</td>\n      <td>15</td>\n      <td>0.272727</td>\n    </tr>\n    <tr>\n      <th>12</th>\n      <td>12</td>\n      <td>30</td>\n      <td>39</td>\n      <td>9</td>\n      <td>0.187500</td>\n    </tr>\n    <tr>\n      <th>13</th>\n      <td>13</td>\n      <td>31</td>\n      <td>35</td>\n      <td>13</td>\n      <td>0.270833</td>\n    </tr>\n    <tr>\n      <th>14</th>\n      <td>14</td>\n      <td>32</td>\n      <td>39</td>\n      <td>8</td>\n      <td>0.170213</td>\n    </tr>\n    <tr>\n      <th>15</th>\n      <td>15</td>\n      <td>33</td>\n      <td>35</td>\n      <td>12</td>\n      <td>0.255319</td>\n    </tr>\n    <tr>\n      <th>16</th>\n      <td>16</td>\n      <td>34</td>\n      <td>46</td>\n      <td>7</td>\n      <td>0.132075</td>\n    </tr>\n    <tr>\n      <th>17</th>\n      <td>17</td>\n      <td>35</td>\n      <td>52</td>\n      <td>7</td>\n      <td>0.118644</td>\n    </tr>\n    <tr>\n      <th>18</th>\n      <td>18</td>\n      <td>36</td>\n      <td>51</td>\n      <td>4</td>\n      <td>0.072727</td>\n    </tr>\n    <tr>\n      <th>19</th>\n      <td>19</td>\n      <td>37</td>\n      <td>33</td>\n      <td>4</td>\n      <td>0.108108</td>\n    </tr>\n    <tr>\n      <th>20</th>\n      <td>20</td>\n      <td>38</td>\n      <td>37</td>\n      <td>2</td>\n      <td>0.051282</td>\n    </tr>\n    <tr>\n      <th>21</th>\n      <td>21</td>\n      <td>39</td>\n      <td>25</td>\n      <td>5</td>\n      <td>0.166667</td>\n    </tr>\n    <tr>\n      <th>22</th>\n      <td>22</td>\n      <td>40</td>\n      <td>44</td>\n      <td>3</td>\n      <td>0.063830</td>\n    </tr>\n    <tr>\n      <th>23</th>\n      <td>23</td>\n      <td>41</td>\n      <td>26</td>\n      <td>5</td>\n      <td>0.161290</td>\n    </tr>\n    <tr>\n      <th>24</th>\n      <td>24</td>\n      <td>42</td>\n      <td>32</td>\n      <td>2</td>\n      <td>0.058824</td>\n    </tr>\n    <tr>\n      <th>25</th>\n      <td>25</td>\n      <td>43</td>\n      <td>24</td>\n      <td>1</td>\n      <td>0.040000</td>\n    </tr>\n    <tr>\n      <th>26</th>\n      <td>26</td>\n      <td>44</td>\n      <td>18</td>\n      <td>4</td>\n      <td>0.181818</td>\n    </tr>\n    <tr>\n      <th>27</th>\n      <td>27</td>\n      <td>45</td>\n      <td>28</td>\n      <td>2</td>\n      <td>0.066667</td>\n    </tr>\n    <tr>\n      <th>28</th>\n      <td>28</td>\n      <td>46</td>\n      <td>22</td>\n      <td>3</td>\n      <td>0.120000</td>\n    </tr>\n    <tr>\n      <th>29</th>\n      <td>29</td>\n      <td>47</td>\n      <td>14</td>\n      <td>2</td>\n      <td>0.125000</td>\n    </tr>\n    <tr>\n      <th>30</th>\n      <td>30</td>\n      <td>48</td>\n      <td>13</td>\n      <td>0</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>31</th>\n      <td>31</td>\n      <td>49</td>\n      <td>20</td>\n      <td>2</td>\n      <td>0.090909</td>\n    </tr>\n    <tr>\n      <th>32</th>\n      <td>32</td>\n      <td>50</td>\n      <td>22</td>\n      <td>1</td>\n      <td>0.043478</td>\n    </tr>\n    <tr>\n      <th>33</th>\n      <td>33</td>\n      <td>51</td>\n      <td>14</td>\n      <td>2</td>\n      <td>0.125000</td>\n    </tr>\n    <tr>\n      <th>34</th>\n      <td>34</td>\n      <td>52</td>\n      <td>10</td>\n      <td>2</td>\n      <td>0.166667</td>\n    </tr>\n    <tr>\n      <th>35</th>\n      <td>35</td>\n      <td>53</td>\n      <td>11</td>\n      <td>2</td>\n      <td>0.153846</td>\n    </tr>\n    <tr>\n      <th>36</th>\n      <td>36</td>\n      <td>54</td>\n      <td>15</td>\n      <td>0</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>37</th>\n      <td>37</td>\n      <td>55</td>\n      <td>14</td>\n      <td>3</td>\n      <td>0.176471</td>\n    </tr>\n    <tr>\n      <th>38</th>\n      <td>38</td>\n      <td>56</td>\n      <td>11</td>\n      <td>2</td>\n      <td>0.153846</td>\n    </tr>\n    <tr>\n      <th>39</th>\n      <td>39</td>\n      <td>57</td>\n      <td>2</td>\n      <td>0</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>40</th>\n      <td>40</td>\n      <td>58</td>\n      <td>4</td>\n      <td>3</td>\n      <td>0.428571</td>\n    </tr>\n    <tr>\n      <th>41</th>\n      <td>41</td>\n      <td>59</td>\n      <td>8</td>\n      <td>0</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>42</th>\n      <td>42</td>\n      <td>60</td>\n      <td>4</td>\n      <td>0</td>\n      <td>0.000000</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tf_weight = pd.read_excel('../data/ana_data.xlsx',sheet_name=\"Age\")\n",
    "tf_weight"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-06-07T03:12:35.909038100Z",
     "start_time": "2025-06-07T03:12:35.881546900Z"
    }
   },
   "id": "9479627492929d57"
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "outputs": [
    {
     "data": {
      "text/plain": "    Attrition       Age  BusinessTravel  Department  DistanceFromHome  \\\n0           0  0.170213        0.156290    0.140303          0.118056   \n1           0  0.118644        0.156290    0.140303          0.166667   \n2           1  0.072727        0.156290    0.140303          0.230769   \n3           0  0.051282        0.156290    0.202417          0.090909   \n4           0  0.270833        0.156290    0.140303          0.090909   \n5           0  0.118644        0.156290    0.202417          0.090909   \n6           0  0.132075        0.156290    0.140303          0.114754   \n7           0  0.285714        0.156290    0.140303          0.189655   \n8           0  0.255319        0.156290    0.140303          0.144654   \n9           0  0.222222        0.083333    0.140303          0.142857   \n10          1  0.285714        0.224390    0.202417          0.118056   \n11          0  0.108108        0.156290    0.140303          0.122449   \n12          0  0.072727        0.156290    0.214286          0.114754   \n13          0  0.400000        0.156290    0.140303          0.147059   \n14          0  0.051282        0.156290    0.202417          0.118056   \n15          0  0.270833        0.156290    0.140303          0.210526   \n16          1  0.500000        0.156290    0.202417          0.144654   \n17          1  0.222222        0.156290    0.140303          0.118056   \n18          0  0.222222        0.224390    0.140303          0.144654   \n19          0  0.072727        0.224390    0.140303          0.127660   \n20          1  0.108108        0.156290    0.202417          0.118056   \n21          0  0.170213        0.156290    0.202417          0.114754   \n22          0  0.118644        0.156290    0.202417          0.142857   \n23          0  0.714286        0.156290    0.140303          0.118056   \n24          0  0.161290        0.156290    0.140303          0.118056   \n25          0  0.322581        0.156290    0.140303          0.144654   \n26          0  0.063830        0.156290    0.202417          0.189655   \n27          0  0.000000        0.083333    0.214286          0.105263   \n28          1  0.285714        0.156290    0.140303          0.200000   \n29          0  0.051282        0.156290    0.202417          0.147059   \n30          1  0.285714        0.156290    0.140303          0.400000   \n31          0  0.285714        0.156290    0.214286          0.114754   \n32          1  0.120000        0.156290    0.202417          0.189655   \n33          1  0.500000        0.156290    0.140303          0.142857   \n34          0  0.066667        0.156290    0.140303          0.217391   \n35          0  0.272727        0.156290    0.140303          0.100000   \n36          0  0.181818        0.083333    0.140303          0.200000   \n37          0  0.058824        0.083333    0.214286          0.144654   \n38          0  0.072727        0.156290    0.140303          0.144654   \n39          0  0.322581        0.156290    0.214286          0.263158   \n40          0  0.285714        0.156290    0.202417          0.144654   \n41          1  0.222222        0.224390    0.140303          0.090909   \n\n    Education  EducationField  EnvironmentSatisfaction  JobInvolvement  \\\n0    0.214286        0.151515                 0.139053        0.146747   \n1    0.214286        0.151515                 0.139053        0.146747   \n2    0.156146        0.151515                 0.133531        0.106796   \n3    0.145631        0.136499                 0.246512        0.106796   \n4    0.145631        0.136499                 0.157143        0.146747   \n5    0.156146        0.151515                 0.133531        0.146747   \n6    0.145631        0.136499                 0.157143        0.146747   \n7    0.167053        0.136499                 0.133531        0.146747   \n8    0.167053        0.151515                 0.133531        0.146747   \n9    0.214286        0.151515                 0.246512        0.168498   \n10   0.167053        0.239130                 0.246512        0.146747   \n11   0.145631        0.136499                 0.139053        0.146747   \n12   0.167053        0.151515                 0.246512        0.168498   \n13   0.167053        0.239130                 0.139053        0.106796   \n14   0.156146        0.151515                 0.139053        0.146747   \n15   0.167053        0.151515                 0.157143        0.146747   \n16   0.167053        0.136499                 0.133531        0.168498   \n17   0.167053        0.136499                 0.157143        0.146747   \n18   0.214286        0.239130                 0.246512        0.146747   \n19   0.156146        0.151515                 0.246512        0.106796   \n20   0.156146        0.151515                 0.246512        0.168498   \n21   0.145631        0.239130                 0.133531        0.168498   \n22   0.156146        0.136499                 0.157143        0.146747   \n23   0.214286        0.239130                 0.139053        0.168498   \n24   0.167053        0.151515                 0.139053        0.146747   \n25   0.214286        0.136499                 0.246512        0.168498   \n26   0.145631        0.136499                 0.246512        0.146747   \n27   0.167053        0.315789                 0.139053        0.106796   \n28   0.167053        0.239130                 0.133531        0.168498   \n29   0.145631        0.151515                 0.246512        0.146747   \n30   0.167053        0.151515                 0.133531        0.168498   \n31   0.145631        0.136499                 0.157143        0.168498   \n32   0.167053        0.212598                 0.246512        0.146747   \n33   0.167053        0.239130                 0.246512        0.146747   \n34   0.167053        0.136499                 0.133531        0.146747   \n35   0.167053        0.151515                 0.133531        0.146747   \n36   0.167053        0.136499                 0.157143        0.146747   \n37   0.055556        0.136499                 0.139053        0.106796   \n38   0.156146        0.151515                 0.133531        0.146747   \n39   0.214286        0.151515                 0.133531        0.146747   \n40   0.156146        0.212598                 0.157143        0.146747   \n41   0.167053        0.151515                 0.246512        0.146747   \n\n    JobLevel  ...  PerformanceRating  RelationshipSatisfaction  \\\n0   0.259709  ...           0.154506                  0.142415   \n1   0.107769  ...           0.202381                  0.142415   \n2   0.259709  ...           0.154506                  0.158824   \n3   0.107769  ...           0.202381                  0.158824   \n4   0.107769  ...           0.154506                  0.152074   \n5   0.107769  ...           0.154506                  0.142415   \n6   0.107769  ...           0.154506                  0.142415   \n7   0.259709  ...           0.154506                  0.152074   \n8   0.259709  ...           0.154506                  0.204545   \n9   0.259709  ...           0.154506                  0.142415   \n10  0.259709  ...           0.154506                  0.142415   \n11  0.107769  ...           0.154506                  0.158824   \n12  0.259709  ...           0.202381                  0.158824   \n13  0.259709  ...           0.154506                  0.142415   \n14  0.107769  ...           0.154506                  0.204545   \n15  0.107769  ...           0.154506                  0.204545   \n16  0.259709  ...           0.154506                  0.142415   \n17  0.259709  ...           0.154506                  0.204545   \n18  0.259709  ...           0.154506                  0.158824   \n19  0.107769  ...           0.154506                  0.142415   \n20  0.127389  ...           0.154506                  0.158824   \n21  0.107769  ...           0.154506                  0.142415   \n22  0.107769  ...           0.154506                  0.142415   \n23  0.259709  ...           0.154506                  0.158824   \n24  0.259709  ...           0.202381                  0.204545   \n25  0.259709  ...           0.154506                  0.142415   \n26  0.107769  ...           0.154506                  0.142415   \n27  0.037037  ...           0.154506                  0.158824   \n28  0.259709  ...           0.154506                  0.204545   \n29  0.107769  ...           0.154506                  0.158824   \n30  0.259709  ...           0.154506                  0.152074   \n31  0.259709  ...           0.154506                  0.142415   \n32  0.127389  ...           0.154506                  0.204545   \n33  0.259709  ...           0.154506                  0.152074   \n34  0.259709  ...           0.154506                  0.152074   \n35  0.259709  ...           0.154506                  0.204545   \n36  0.107769  ...           0.154506                  0.158824   \n37  0.107769  ...           0.154506                  0.204545   \n38  0.107769  ...           0.154506                  0.142415   \n39  0.259709  ...           0.202381                  0.142415   \n40  0.107769  ...           0.202381                  0.204545   \n41  0.259709  ...           0.154506                  0.142415   \n\n    StockOptionLevel  TotalWorkingYears  TrainingTimesLastYear  \\\n0           0.251586           0.522388               0.181818   \n1           0.085202           0.112500               0.181818   \n2           0.251586           0.111111               0.100000   \n3           0.085202           0.175000               0.145119   \n4           0.085202           0.125000               0.123596   \n5           0.251586           0.112500               0.181818   \n6           0.251586           0.112500               0.181818   \n7           0.085202           0.160920               0.181818   \n8           0.251586           0.183673               0.181818   \n9           0.251586           0.160920               0.181818   \n10          0.085202           0.161765               0.145119   \n11          0.065574           0.175000               0.181818   \n12          0.251586           0.160920               0.145119   \n13          0.085202           0.183673               0.181818   \n14          0.065574           0.037037               0.145119   \n15          0.085202           0.112500               0.181818   \n16          0.251586           0.285714               0.145119   \n17          0.085202           0.160920               0.181818   \n18          0.251586           0.160920               0.181818   \n19          0.085202           0.130435               0.145119   \n20          0.251586           0.047619               0.181818   \n21          0.251586           0.125000               0.181818   \n22          0.085202           0.160920               0.181818   \n23          0.251586           0.285714               0.125000   \n24          0.085202           0.142857               0.145119   \n25          0.251586           0.161765               0.181818   \n26          0.251586           0.130435               0.123596   \n27          0.251586           0.058824               0.145119   \n28          0.085202           0.160920               0.181818   \n29          0.085202           0.112500               0.100000   \n30          0.251586           0.160920               0.191489   \n31          0.085202           0.160920               0.125000   \n32          0.085202           0.000000               0.100000   \n33          0.251586           0.522388               0.181818   \n34          0.251586           0.130435               0.181818   \n35          0.251586           0.160920               0.100000   \n36          0.085202           0.047619               0.145119   \n37          0.085202           0.112500               0.145119   \n38          0.251586           0.111111               0.181818   \n39          0.085202           0.175000               0.145119   \n40          0.251586           0.175000               0.181818   \n41          0.085202           0.160920               0.191489   \n\n    WorkLifeBalance  YearsAtCompany  YearsInCurrentRole  \\\n0          0.141593        0.358209            0.288043   \n1          0.269841        0.152174            0.038462   \n2          0.141593        0.000000            0.000000   \n3          0.141593        0.101695            0.288043   \n4          0.269841        0.133333            0.066667   \n5          0.141593        0.152174            0.072464   \n6          0.141593        0.132450            0.219512   \n7          0.141593        0.212766            0.189091   \n8          0.194175        0.144330            0.219512   \n9          0.175781        0.132450            0.189091   \n10         0.194175        0.144330            0.189091   \n11         0.175781        0.153846            0.189091   \n12         0.141593        0.132450            0.168675   \n13         0.141593        0.212766            0.189091   \n14         0.141593        0.000000            0.000000   \n15         0.141593        0.152174            0.072464   \n16         0.175781        0.212766            0.189091   \n17         0.175781        0.212766            0.288043   \n18         0.141593        0.153846            0.121495   \n19         0.141593        0.144330            0.189091   \n20         0.269841        0.000000            0.219512   \n21         0.141593        0.133333            0.125000   \n22         0.269841        0.132450            0.121495   \n23         0.194175        0.212766            0.189091   \n24         0.141593        0.132450            0.121495   \n25         0.141593        0.150000            0.121495   \n26         0.194175        0.101695            0.168675   \n27         0.141593        0.132450            0.121495   \n28         0.175781        0.150000            0.219512   \n29         0.141593        0.152174            0.133333   \n30         0.141593        0.150000            0.189091   \n31         0.141593        0.132450            0.219512   \n32         0.194175        0.128571            0.133333   \n33         0.141593        0.358209            0.288043   \n34         0.175781        0.153846            0.000000   \n35         0.141593        0.153846            0.000000   \n36         0.194175        0.144330            0.189091   \n37         0.194175        0.150000            0.121495   \n38         0.141593        0.125000            0.125000   \n39         0.141593        0.101695            0.133333   \n40         0.141593        0.101695            0.133333   \n41         0.141593        0.153846            0.121495   \n\n    YearsSinceLastPromotion  YearsWithCurrManager  \n0                  0.191011              0.319797  \n1                  0.191011              0.147239  \n2                  0.090909              0.250000  \n3                  0.206897              0.147239  \n4                  0.175000              0.000000  \n5                  0.191011              0.130435  \n6                  0.069767              0.134615  \n7                  0.162602              0.138462  \n8                  0.191011              0.138462  \n9                  0.175000              0.150685  \n10                 0.149606              0.138462  \n11                 0.191011              0.150685  \n12                 0.191011              0.134615  \n13                 0.162602              0.138462  \n14                 0.083333              0.083333  \n15                 0.191011              0.138462  \n16                 0.191011              0.138462  \n17                 0.162602              0.319797  \n18                 0.149606              0.134615  \n19                 0.191011              0.138462  \n20                 0.000000              0.147239  \n21                 0.000000              0.130435  \n22                 0.191011              0.150685  \n23                 0.162602              0.138462  \n24                 0.149606              0.319797  \n25                 0.149606              0.169492  \n26                 0.206897              0.169492  \n27                 0.069767              0.150685  \n28                 0.191011              0.134615  \n29                 0.149606              0.130435  \n30                 0.191011              0.134615  \n31                 0.191011              0.150685  \n32                 0.069767              0.134615  \n33                 0.191011              0.319797  \n34                 0.191011              0.134615  \n35                 0.149606              0.100000  \n36                 0.149606              0.138462  \n37                 0.191011              0.134615  \n38                 0.083333              0.000000  \n39                 0.083333              0.147239  \n40                 0.206897              0.100000  \n41                 0.149606              0.138462  \n\n[42 rows x 27 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Attrition</th>\n      <th>Age</th>\n      <th>BusinessTravel</th>\n      <th>Department</th>\n      <th>DistanceFromHome</th>\n      <th>Education</th>\n      <th>EducationField</th>\n      <th>EnvironmentSatisfaction</th>\n      <th>JobInvolvement</th>\n      <th>JobLevel</th>\n      <th>...</th>\n      <th>PerformanceRating</th>\n      <th>RelationshipSatisfaction</th>\n      <th>StockOptionLevel</th>\n      <th>TotalWorkingYears</th>\n      <th>TrainingTimesLastYear</th>\n      <th>WorkLifeBalance</th>\n      <th>YearsAtCompany</th>\n      <th>YearsInCurrentRole</th>\n      <th>YearsSinceLastPromotion</th>\n      <th>YearsWithCurrManager</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>0</td>\n      <td>0.170213</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.118056</td>\n      <td>0.214286</td>\n      <td>0.151515</td>\n      <td>0.139053</td>\n      <td>0.146747</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.142415</td>\n      <td>0.251586</td>\n      <td>0.522388</td>\n      <td>0.181818</td>\n      <td>0.141593</td>\n      <td>0.358209</td>\n      <td>0.288043</td>\n      <td>0.191011</td>\n      <td>0.319797</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>0</td>\n      <td>0.118644</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.166667</td>\n      <td>0.214286</td>\n      <td>0.151515</td>\n      <td>0.139053</td>\n      <td>0.146747</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.202381</td>\n      <td>0.142415</td>\n      <td>0.085202</td>\n      <td>0.112500</td>\n      <td>0.181818</td>\n      <td>0.269841</td>\n      <td>0.152174</td>\n      <td>0.038462</td>\n      <td>0.191011</td>\n      <td>0.147239</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>0.072727</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.230769</td>\n      <td>0.156146</td>\n      <td>0.151515</td>\n      <td>0.133531</td>\n      <td>0.106796</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.158824</td>\n      <td>0.251586</td>\n      <td>0.111111</td>\n      <td>0.100000</td>\n      <td>0.141593</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>0.090909</td>\n      <td>0.250000</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>0</td>\n      <td>0.051282</td>\n      <td>0.156290</td>\n      <td>0.202417</td>\n      <td>0.090909</td>\n      <td>0.145631</td>\n      <td>0.136499</td>\n      <td>0.246512</td>\n      <td>0.106796</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.202381</td>\n      <td>0.158824</td>\n      <td>0.085202</td>\n      <td>0.175000</td>\n      <td>0.145119</td>\n      <td>0.141593</td>\n      <td>0.101695</td>\n      <td>0.288043</td>\n      <td>0.206897</td>\n      <td>0.147239</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>0</td>\n      <td>0.270833</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.090909</td>\n      <td>0.145631</td>\n      <td>0.136499</td>\n      <td>0.157143</td>\n      <td>0.146747</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.152074</td>\n      <td>0.085202</td>\n      <td>0.125000</td>\n      <td>0.123596</td>\n      <td>0.269841</td>\n      <td>0.133333</td>\n      <td>0.066667</td>\n      <td>0.175000</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>0</td>\n      <td>0.118644</td>\n      <td>0.156290</td>\n      <td>0.202417</td>\n      <td>0.090909</td>\n      <td>0.156146</td>\n      <td>0.151515</td>\n      <td>0.133531</td>\n      <td>0.146747</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.142415</td>\n      <td>0.251586</td>\n      <td>0.112500</td>\n      <td>0.181818</td>\n      <td>0.141593</td>\n      <td>0.152174</td>\n      <td>0.072464</td>\n      <td>0.191011</td>\n      <td>0.130435</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>0</td>\n      <td>0.132075</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.114754</td>\n      <td>0.145631</td>\n      <td>0.136499</td>\n      <td>0.157143</td>\n      <td>0.146747</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.142415</td>\n      <td>0.251586</td>\n      <td>0.112500</td>\n      <td>0.181818</td>\n      <td>0.141593</td>\n      <td>0.132450</td>\n      <td>0.219512</td>\n      <td>0.069767</td>\n      <td>0.134615</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>0</td>\n      <td>0.285714</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.189655</td>\n      <td>0.167053</td>\n      <td>0.136499</td>\n      <td>0.133531</td>\n      <td>0.146747</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.152074</td>\n      <td>0.085202</td>\n      <td>0.160920</td>\n      <td>0.181818</td>\n      <td>0.141593</td>\n      <td>0.212766</td>\n      <td>0.189091</td>\n      <td>0.162602</td>\n      <td>0.138462</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>0</td>\n      <td>0.255319</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.144654</td>\n      <td>0.167053</td>\n      <td>0.151515</td>\n      <td>0.133531</td>\n      <td>0.146747</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.204545</td>\n      <td>0.251586</td>\n      <td>0.183673</td>\n      <td>0.181818</td>\n      <td>0.194175</td>\n      <td>0.144330</td>\n      <td>0.219512</td>\n      <td>0.191011</td>\n      <td>0.138462</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>0</td>\n      <td>0.222222</td>\n      <td>0.083333</td>\n      <td>0.140303</td>\n      <td>0.142857</td>\n      <td>0.214286</td>\n      <td>0.151515</td>\n      <td>0.246512</td>\n      <td>0.168498</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.142415</td>\n      <td>0.251586</td>\n      <td>0.160920</td>\n      <td>0.181818</td>\n      <td>0.175781</td>\n      <td>0.132450</td>\n      <td>0.189091</td>\n      <td>0.175000</td>\n      <td>0.150685</td>\n    </tr>\n    <tr>\n      <th>10</th>\n      <td>1</td>\n      <td>0.285714</td>\n      <td>0.224390</td>\n      <td>0.202417</td>\n      <td>0.118056</td>\n      <td>0.167053</td>\n      <td>0.239130</td>\n      <td>0.246512</td>\n      <td>0.146747</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.142415</td>\n      <td>0.085202</td>\n      <td>0.161765</td>\n      <td>0.145119</td>\n      <td>0.194175</td>\n      <td>0.144330</td>\n      <td>0.189091</td>\n      <td>0.149606</td>\n      <td>0.138462</td>\n    </tr>\n    <tr>\n      <th>11</th>\n      <td>0</td>\n      <td>0.108108</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.122449</td>\n      <td>0.145631</td>\n      <td>0.136499</td>\n      <td>0.139053</td>\n      <td>0.146747</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.158824</td>\n      <td>0.065574</td>\n      <td>0.175000</td>\n      <td>0.181818</td>\n      <td>0.175781</td>\n      <td>0.153846</td>\n      <td>0.189091</td>\n      <td>0.191011</td>\n      <td>0.150685</td>\n    </tr>\n    <tr>\n      <th>12</th>\n      <td>0</td>\n      <td>0.072727</td>\n      <td>0.156290</td>\n      <td>0.214286</td>\n      <td>0.114754</td>\n      <td>0.167053</td>\n      <td>0.151515</td>\n      <td>0.246512</td>\n      <td>0.168498</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.202381</td>\n      <td>0.158824</td>\n      <td>0.251586</td>\n      <td>0.160920</td>\n      <td>0.145119</td>\n      <td>0.141593</td>\n      <td>0.132450</td>\n      <td>0.168675</td>\n      <td>0.191011</td>\n      <td>0.134615</td>\n    </tr>\n    <tr>\n      <th>13</th>\n      <td>0</td>\n      <td>0.400000</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.147059</td>\n      <td>0.167053</td>\n      <td>0.239130</td>\n      <td>0.139053</td>\n      <td>0.106796</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.142415</td>\n      <td>0.085202</td>\n      <td>0.183673</td>\n      <td>0.181818</td>\n      <td>0.141593</td>\n      <td>0.212766</td>\n      <td>0.189091</td>\n      <td>0.162602</td>\n      <td>0.138462</td>\n    </tr>\n    <tr>\n      <th>14</th>\n      <td>0</td>\n      <td>0.051282</td>\n      <td>0.156290</td>\n      <td>0.202417</td>\n      <td>0.118056</td>\n      <td>0.156146</td>\n      <td>0.151515</td>\n      <td>0.139053</td>\n      <td>0.146747</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.204545</td>\n      <td>0.065574</td>\n      <td>0.037037</td>\n      <td>0.145119</td>\n      <td>0.141593</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>0.083333</td>\n      <td>0.083333</td>\n    </tr>\n    <tr>\n      <th>15</th>\n      <td>0</td>\n      <td>0.270833</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.210526</td>\n      <td>0.167053</td>\n      <td>0.151515</td>\n      <td>0.157143</td>\n      <td>0.146747</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.204545</td>\n      <td>0.085202</td>\n      <td>0.112500</td>\n      <td>0.181818</td>\n      <td>0.141593</td>\n      <td>0.152174</td>\n      <td>0.072464</td>\n      <td>0.191011</td>\n      <td>0.138462</td>\n    </tr>\n    <tr>\n      <th>16</th>\n      <td>1</td>\n      <td>0.500000</td>\n      <td>0.156290</td>\n      <td>0.202417</td>\n      <td>0.144654</td>\n      <td>0.167053</td>\n      <td>0.136499</td>\n      <td>0.133531</td>\n      <td>0.168498</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.142415</td>\n      <td>0.251586</td>\n      <td>0.285714</td>\n      <td>0.145119</td>\n      <td>0.175781</td>\n      <td>0.212766</td>\n      <td>0.189091</td>\n      <td>0.191011</td>\n      <td>0.138462</td>\n    </tr>\n    <tr>\n      <th>17</th>\n      <td>1</td>\n      <td>0.222222</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.118056</td>\n      <td>0.167053</td>\n      <td>0.136499</td>\n      <td>0.157143</td>\n      <td>0.146747</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.204545</td>\n      <td>0.085202</td>\n      <td>0.160920</td>\n      <td>0.181818</td>\n      <td>0.175781</td>\n      <td>0.212766</td>\n      <td>0.288043</td>\n      <td>0.162602</td>\n      <td>0.319797</td>\n    </tr>\n    <tr>\n      <th>18</th>\n      <td>0</td>\n      <td>0.222222</td>\n      <td>0.224390</td>\n      <td>0.140303</td>\n      <td>0.144654</td>\n      <td>0.214286</td>\n      <td>0.239130</td>\n      <td>0.246512</td>\n      <td>0.146747</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.158824</td>\n      <td>0.251586</td>\n      <td>0.160920</td>\n      <td>0.181818</td>\n      <td>0.141593</td>\n      <td>0.153846</td>\n      <td>0.121495</td>\n      <td>0.149606</td>\n      <td>0.134615</td>\n    </tr>\n    <tr>\n      <th>19</th>\n      <td>0</td>\n      <td>0.072727</td>\n      <td>0.224390</td>\n      <td>0.140303</td>\n      <td>0.127660</td>\n      <td>0.156146</td>\n      <td>0.151515</td>\n      <td>0.246512</td>\n      <td>0.106796</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.142415</td>\n      <td>0.085202</td>\n      <td>0.130435</td>\n      <td>0.145119</td>\n      <td>0.141593</td>\n      <td>0.144330</td>\n      <td>0.189091</td>\n      <td>0.191011</td>\n      <td>0.138462</td>\n    </tr>\n    <tr>\n      <th>20</th>\n      <td>1</td>\n      <td>0.108108</td>\n      <td>0.156290</td>\n      <td>0.202417</td>\n      <td>0.118056</td>\n      <td>0.156146</td>\n      <td>0.151515</td>\n      <td>0.246512</td>\n      <td>0.168498</td>\n      <td>0.127389</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.158824</td>\n      <td>0.251586</td>\n      <td>0.047619</td>\n      <td>0.181818</td>\n      <td>0.269841</td>\n      <td>0.000000</td>\n      <td>0.219512</td>\n      <td>0.000000</td>\n      <td>0.147239</td>\n    </tr>\n    <tr>\n      <th>21</th>\n      <td>0</td>\n      <td>0.170213</td>\n      <td>0.156290</td>\n      <td>0.202417</td>\n      <td>0.114754</td>\n      <td>0.145631</td>\n      <td>0.239130</td>\n      <td>0.133531</td>\n      <td>0.168498</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.142415</td>\n      <td>0.251586</td>\n      <td>0.125000</td>\n      <td>0.181818</td>\n      <td>0.141593</td>\n      <td>0.133333</td>\n      <td>0.125000</td>\n      <td>0.000000</td>\n      <td>0.130435</td>\n    </tr>\n    <tr>\n      <th>22</th>\n      <td>0</td>\n      <td>0.118644</td>\n      <td>0.156290</td>\n      <td>0.202417</td>\n      <td>0.142857</td>\n      <td>0.156146</td>\n      <td>0.136499</td>\n      <td>0.157143</td>\n      <td>0.146747</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.142415</td>\n      <td>0.085202</td>\n      <td>0.160920</td>\n      <td>0.181818</td>\n      <td>0.269841</td>\n      <td>0.132450</td>\n      <td>0.121495</td>\n      <td>0.191011</td>\n      <td>0.150685</td>\n    </tr>\n    <tr>\n      <th>23</th>\n      <td>0</td>\n      <td>0.714286</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.118056</td>\n      <td>0.214286</td>\n      <td>0.239130</td>\n      <td>0.139053</td>\n      <td>0.168498</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.158824</td>\n      <td>0.251586</td>\n      <td>0.285714</td>\n      <td>0.125000</td>\n      <td>0.194175</td>\n      <td>0.212766</td>\n      <td>0.189091</td>\n      <td>0.162602</td>\n      <td>0.138462</td>\n    </tr>\n    <tr>\n      <th>24</th>\n      <td>0</td>\n      <td>0.161290</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.118056</td>\n      <td>0.167053</td>\n      <td>0.151515</td>\n      <td>0.139053</td>\n      <td>0.146747</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.202381</td>\n      <td>0.204545</td>\n      <td>0.085202</td>\n      <td>0.142857</td>\n      <td>0.145119</td>\n      <td>0.141593</td>\n      <td>0.132450</td>\n      <td>0.121495</td>\n      <td>0.149606</td>\n      <td>0.319797</td>\n    </tr>\n    <tr>\n      <th>25</th>\n      <td>0</td>\n      <td>0.322581</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.144654</td>\n      <td>0.214286</td>\n      <td>0.136499</td>\n      <td>0.246512</td>\n      <td>0.168498</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.142415</td>\n      <td>0.251586</td>\n      <td>0.161765</td>\n      <td>0.181818</td>\n      <td>0.141593</td>\n      <td>0.150000</td>\n      <td>0.121495</td>\n      <td>0.149606</td>\n      <td>0.169492</td>\n    </tr>\n    <tr>\n      <th>26</th>\n      <td>0</td>\n      <td>0.063830</td>\n      <td>0.156290</td>\n      <td>0.202417</td>\n      <td>0.189655</td>\n      <td>0.145631</td>\n      <td>0.136499</td>\n      <td>0.246512</td>\n      <td>0.146747</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.142415</td>\n      <td>0.251586</td>\n      <td>0.130435</td>\n      <td>0.123596</td>\n      <td>0.194175</td>\n      <td>0.101695</td>\n      <td>0.168675</td>\n      <td>0.206897</td>\n      <td>0.169492</td>\n    </tr>\n    <tr>\n      <th>27</th>\n      <td>0</td>\n      <td>0.000000</td>\n      <td>0.083333</td>\n      <td>0.214286</td>\n      <td>0.105263</td>\n      <td>0.167053</td>\n      <td>0.315789</td>\n      <td>0.139053</td>\n      <td>0.106796</td>\n      <td>0.037037</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.158824</td>\n      <td>0.251586</td>\n      <td>0.058824</td>\n      <td>0.145119</td>\n      <td>0.141593</td>\n      <td>0.132450</td>\n      <td>0.121495</td>\n      <td>0.069767</td>\n      <td>0.150685</td>\n    </tr>\n    <tr>\n      <th>28</th>\n      <td>1</td>\n      <td>0.285714</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.200000</td>\n      <td>0.167053</td>\n      <td>0.239130</td>\n      <td>0.133531</td>\n      <td>0.168498</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.204545</td>\n      <td>0.085202</td>\n      <td>0.160920</td>\n      <td>0.181818</td>\n      <td>0.175781</td>\n      <td>0.150000</td>\n      <td>0.219512</td>\n      <td>0.191011</td>\n      <td>0.134615</td>\n    </tr>\n    <tr>\n      <th>29</th>\n      <td>0</td>\n      <td>0.051282</td>\n      <td>0.156290</td>\n      <td>0.202417</td>\n      <td>0.147059</td>\n      <td>0.145631</td>\n      <td>0.151515</td>\n      <td>0.246512</td>\n      <td>0.146747</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.158824</td>\n      <td>0.085202</td>\n      <td>0.112500</td>\n      <td>0.100000</td>\n      <td>0.141593</td>\n      <td>0.152174</td>\n      <td>0.133333</td>\n      <td>0.149606</td>\n      <td>0.130435</td>\n    </tr>\n    <tr>\n      <th>30</th>\n      <td>1</td>\n      <td>0.285714</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.400000</td>\n      <td>0.167053</td>\n      <td>0.151515</td>\n      <td>0.133531</td>\n      <td>0.168498</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.152074</td>\n      <td>0.251586</td>\n      <td>0.160920</td>\n      <td>0.191489</td>\n      <td>0.141593</td>\n      <td>0.150000</td>\n      <td>0.189091</td>\n      <td>0.191011</td>\n      <td>0.134615</td>\n    </tr>\n    <tr>\n      <th>31</th>\n      <td>0</td>\n      <td>0.285714</td>\n      <td>0.156290</td>\n      <td>0.214286</td>\n      <td>0.114754</td>\n      <td>0.145631</td>\n      <td>0.136499</td>\n      <td>0.157143</td>\n      <td>0.168498</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.142415</td>\n      <td>0.085202</td>\n      <td>0.160920</td>\n      <td>0.125000</td>\n      <td>0.141593</td>\n      <td>0.132450</td>\n      <td>0.219512</td>\n      <td>0.191011</td>\n      <td>0.150685</td>\n    </tr>\n    <tr>\n      <th>32</th>\n      <td>1</td>\n      <td>0.120000</td>\n      <td>0.156290</td>\n      <td>0.202417</td>\n      <td>0.189655</td>\n      <td>0.167053</td>\n      <td>0.212598</td>\n      <td>0.246512</td>\n      <td>0.146747</td>\n      <td>0.127389</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.204545</td>\n      <td>0.085202</td>\n      <td>0.000000</td>\n      <td>0.100000</td>\n      <td>0.194175</td>\n      <td>0.128571</td>\n      <td>0.133333</td>\n      <td>0.069767</td>\n      <td>0.134615</td>\n    </tr>\n    <tr>\n      <th>33</th>\n      <td>1</td>\n      <td>0.500000</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.142857</td>\n      <td>0.167053</td>\n      <td>0.239130</td>\n      <td>0.246512</td>\n      <td>0.146747</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.152074</td>\n      <td>0.251586</td>\n      <td>0.522388</td>\n      <td>0.181818</td>\n      <td>0.141593</td>\n      <td>0.358209</td>\n      <td>0.288043</td>\n      <td>0.191011</td>\n      <td>0.319797</td>\n    </tr>\n    <tr>\n      <th>34</th>\n      <td>0</td>\n      <td>0.066667</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.217391</td>\n      <td>0.167053</td>\n      <td>0.136499</td>\n      <td>0.133531</td>\n      <td>0.146747</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.152074</td>\n      <td>0.251586</td>\n      <td>0.130435</td>\n      <td>0.181818</td>\n      <td>0.175781</td>\n      <td>0.153846</td>\n      <td>0.000000</td>\n      <td>0.191011</td>\n      <td>0.134615</td>\n    </tr>\n    <tr>\n      <th>35</th>\n      <td>0</td>\n      <td>0.272727</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.100000</td>\n      <td>0.167053</td>\n      <td>0.151515</td>\n      <td>0.133531</td>\n      <td>0.146747</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.204545</td>\n      <td>0.251586</td>\n      <td>0.160920</td>\n      <td>0.100000</td>\n      <td>0.141593</td>\n      <td>0.153846</td>\n      <td>0.000000</td>\n      <td>0.149606</td>\n      <td>0.100000</td>\n    </tr>\n    <tr>\n      <th>36</th>\n      <td>0</td>\n      <td>0.181818</td>\n      <td>0.083333</td>\n      <td>0.140303</td>\n      <td>0.200000</td>\n      <td>0.167053</td>\n      <td>0.136499</td>\n      <td>0.157143</td>\n      <td>0.146747</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.158824</td>\n      <td>0.085202</td>\n      <td>0.047619</td>\n      <td>0.145119</td>\n      <td>0.194175</td>\n      <td>0.144330</td>\n      <td>0.189091</td>\n      <td>0.149606</td>\n      <td>0.138462</td>\n    </tr>\n    <tr>\n      <th>37</th>\n      <td>0</td>\n      <td>0.058824</td>\n      <td>0.083333</td>\n      <td>0.214286</td>\n      <td>0.144654</td>\n      <td>0.055556</td>\n      <td>0.136499</td>\n      <td>0.139053</td>\n      <td>0.106796</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.204545</td>\n      <td>0.085202</td>\n      <td>0.112500</td>\n      <td>0.145119</td>\n      <td>0.194175</td>\n      <td>0.150000</td>\n      <td>0.121495</td>\n      <td>0.191011</td>\n      <td>0.134615</td>\n    </tr>\n    <tr>\n      <th>38</th>\n      <td>0</td>\n      <td>0.072727</td>\n      <td>0.156290</td>\n      <td>0.140303</td>\n      <td>0.144654</td>\n      <td>0.156146</td>\n      <td>0.151515</td>\n      <td>0.133531</td>\n      <td>0.146747</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.142415</td>\n      <td>0.251586</td>\n      <td>0.111111</td>\n      <td>0.181818</td>\n      <td>0.141593</td>\n      <td>0.125000</td>\n      <td>0.125000</td>\n      <td>0.083333</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>39</th>\n      <td>0</td>\n      <td>0.322581</td>\n      <td>0.156290</td>\n      <td>0.214286</td>\n      <td>0.263158</td>\n      <td>0.214286</td>\n      <td>0.151515</td>\n      <td>0.133531</td>\n      <td>0.146747</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.202381</td>\n      <td>0.142415</td>\n      <td>0.085202</td>\n      <td>0.175000</td>\n      <td>0.145119</td>\n      <td>0.141593</td>\n      <td>0.101695</td>\n      <td>0.133333</td>\n      <td>0.083333</td>\n      <td>0.147239</td>\n    </tr>\n    <tr>\n      <th>40</th>\n      <td>0</td>\n      <td>0.285714</td>\n      <td>0.156290</td>\n      <td>0.202417</td>\n      <td>0.144654</td>\n      <td>0.156146</td>\n      <td>0.212598</td>\n      <td>0.157143</td>\n      <td>0.146747</td>\n      <td>0.107769</td>\n      <td>...</td>\n      <td>0.202381</td>\n      <td>0.204545</td>\n      <td>0.251586</td>\n      <td>0.175000</td>\n      <td>0.181818</td>\n      <td>0.141593</td>\n      <td>0.101695</td>\n      <td>0.133333</td>\n      <td>0.206897</td>\n      <td>0.100000</td>\n    </tr>\n    <tr>\n      <th>41</th>\n      <td>1</td>\n      <td>0.222222</td>\n      <td>0.224390</td>\n      <td>0.140303</td>\n      <td>0.090909</td>\n      <td>0.167053</td>\n      <td>0.151515</td>\n      <td>0.246512</td>\n      <td>0.146747</td>\n      <td>0.259709</td>\n      <td>...</td>\n      <td>0.154506</td>\n      <td>0.142415</td>\n      <td>0.085202</td>\n      <td>0.160920</td>\n      <td>0.191489</td>\n      <td>0.141593</td>\n      <td>0.153846</td>\n      <td>0.121495</td>\n      <td>0.149606</td>\n      <td>0.138462</td>\n    </tr>\n  </tbody>\n</table>\n<p>42 rows × 27 columns</p>\n</div>"
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# # 读取权重表\n",
    "# tf1 = tf.copy()\n",
    "# cols = tf.columns.drop('Attrition')\n",
    "# for item in cols.values:\n",
    "#     tf_weight = pd.read_excel('../data/ana_data.xlsx',sheet_name=item)\n",
    "#     tf1 = pd.merge(tf1,tf_weight[[item,'ratio']],on=item)\n",
    "#     tf1.drop(item,axis='columns',inplace=True)\n",
    "#     tf1.rename(columns={'ratio':item},inplace=True)\n",
    "# tf1"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-06-07T03:17:30.621655600Z",
     "start_time": "2025-06-07T03:17:30.103497700Z"
    }
   },
   "id": "96eb14b6cf05ce32"
  },
  {
   "cell_type": "markdown",
   "source": [],
   "metadata": {
    "collapsed": false
   },
   "id": "f088fb4069c8b896"
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "outputs": [
    {
     "data": {
      "text/plain": "0.45625000000000004"
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# x = tf1.drop('Attrition',axis='columns')\n",
    "# y = tf1['Attrition']\n",
    "# y_pre = es.predict(x)\n",
    "# roc_auc_score(y,y_pre)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-06-07T03:19:25.413059300Z",
     "start_time": "2025-06-07T03:19:25.407361700Z"
    }
   },
   "id": "e3fa6a8b838f7924"
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 350 entries, 0 to 349\n",
      "Data columns (total 27 columns):\n",
      " #   Column                    Non-Null Count  Dtype  \n",
      "---  ------                    --------------  -----  \n",
      " 0   Attrition                 350 non-null    int64  \n",
      " 1   Age                       350 non-null    float64\n",
      " 2   BusinessTravel            350 non-null    float64\n",
      " 3   Department                350 non-null    float64\n",
      " 4   DistanceFromHome          350 non-null    float64\n",
      " 5   Education                 350 non-null    float64\n",
      " 6   EducationField            350 non-null    float64\n",
      " 7   EnvironmentSatisfaction   350 non-null    float64\n",
      " 8   JobInvolvement            350 non-null    float64\n",
      " 9   JobLevel                  350 non-null    float64\n",
      " 10  JobRole                   350 non-null    float64\n",
      " 11  JobSatisfaction           350 non-null    float64\n",
      " 12  MaritalStatus             350 non-null    float64\n",
      " 13  MonthlyIncome             350 non-null    float64\n",
      " 14  NumCompaniesWorked        350 non-null    float64\n",
      " 15  OverTime                  350 non-null    float64\n",
      " 16  PercentSalaryHike         350 non-null    float64\n",
      " 17  PerformanceRating         350 non-null    float64\n",
      " 18  RelationshipSatisfaction  350 non-null    float64\n",
      " 19  StockOptionLevel          350 non-null    float64\n",
      " 20  TotalWorkingYears         350 non-null    float64\n",
      " 21  TrainingTimesLastYear     350 non-null    float64\n",
      " 22  WorkLifeBalance           350 non-null    float64\n",
      " 23  YearsAtCompany            350 non-null    float64\n",
      " 24  YearsInCurrentRole        350 non-null    float64\n",
      " 25  YearsSinceLastPromotion   350 non-null    float64\n",
      " 26  YearsWithCurrManager      350 non-null    float64\n",
      "dtypes: float64(26), int64(1)\n",
      "memory usage: 74.0 KB\n"
     ]
    }
   ],
   "source": [
    "tf1 = tf.copy()\n",
    "cols = tf1.columns.drop('Attrition')\n",
    "for item in cols.values:\n",
    "    tf2 =pd.crosstab(tf1[item],tf1['Attrition']).reset_index()\n",
    "    tf2['ratio'] = tf2[1] / (tf2[1]+ tf2[0])\n",
    "    tf1 = pd.merge(tf1,tf2[[item,'ratio']],on=item)\n",
    "    tf1.drop(item,axis='columns',inplace=True)\n",
    "    tf1.rename(columns={'ratio':item},inplace=True)\n",
    "tf1.info()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-06-07T03:24:58.970846900Z",
     "start_time": "2025-06-07T03:24:58.874268500Z"
    }
   },
   "id": "2d0d452d316f294b"
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "outputs": [
    {
     "data": {
      "text/plain": "0.9811320754716981"
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = tf1.drop('Attrition',axis='columns')\n",
    "y = tf1['Attrition']\n",
    "y_pre = es.predict(x)\n",
    "roc_auc_score(y,y_pre)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-06-07T03:25:14.298018300Z",
     "start_time": "2025-06-07T03:25:14.293268700Z"
    }
   },
   "id": "ce62879dfc83f8de"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
